op {
  graph_op_name: "ApplyRMSProp"
  in_arg {
    name: "var"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "ms"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "mom"
    description: <<END
Should be from a Variable().
END
  }
  in_arg {
    name: "lr"
    description: <<END
Scaling factor. Must be a scalar.
END
  }
  in_arg {
    name: "rho"
    description: <<END
Decay rate. Must be a scalar.
END
  }
  in_arg {
    name: "epsilon"
    description: <<END
Ridge term. Must be a scalar.
END
  }
  in_arg {
    name: "grad"
    description: <<END
The gradient.
END
  }
  out_arg {
    name: "out"
    description: <<END
Same as "var".
END
  }
  attr {
    name: "use_locking"
    description: <<END
If `True`, updating of the var, ms, and mom tensors is protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
END
  }
  summary: "Update \'*var\' according to the RMSProp algorithm."
  description: <<END
Note that in dense implementation of this algorithm, ms and mom will
update even if the grad is zero, but in this sparse implementation, ms
and mom will not update in iterations during which the grad is zero.

mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)

ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
END
}
